In [1]:
from module import data
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

sns.set_theme(context='paper', palette='deep')
sns.set_style('ticks')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2492 entries, 0 to 2491
Data columns (total 6 columns):
 #   Column                                  Non-Null Count  Dtype 
---  ------                                  --------------  ----- 
 0   Country                                 2492 non-null   object
 1   Year                                    2492 non-null   int64 
 2   Number of reported cases of cholera     2470 non-null   object
 3   Number of reported deaths from cholera  2375 non-null   object
 4   Cholera case fatality rate              2365 non-null   object
 5   WHO Region                              2492 non-null   object
dtypes: int64(1), object(5)
memory usage: 116.9+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2492 entries, 0 to 2491
Data columns (total 6 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Country                                 2492 non-null   object 
 1   Year                                    2492 non-null   int64  
 2   Number of reported cases of cholera     2469 non-null   float32
 3   Number of reported deaths from cholera  2373 non-null   float32
 4   Cholera case fatality rate              2363 non-null   float32
 5   WHO Region                              2492 non-null   object 
dtypes: float32(3), int64(1), object(2)
memory usage: 87.7+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2352 entries, 0 to 2468
Data columns (total 6 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Country                                 2352 non-null   object 
 1   Year                                    2352 non-null   int64  
 2   Number of reported cases of cholera     2352 non-null   float32
 3   Number of reported deaths from cholera  2352 non-null   float32
 4   Cholera case fatality rate              2352 non-null   float32
 5   WHO Region                              2352 non-null   object 
dtypes: float32(3), int64(1), object(2)
memory usage: 101.1+ KB

Exploratory data analysis - Western Pacific¶

1. Sum of cases, deaths, and fatality rate of cholera in Western Pacific countries¶

In [2]:
data_western = data.loc[data['WHO Region']=='Western Pacific']

for c in data_western.select_dtypes(include=np.number):
  if c == 'Year':
    continue
  else:    
    fig = plt.figure(figsize=(5,10))

    barplot = sns.barplot(
            data=data_western,
            x=c,
            y='Country',
            dodge=False
        )
    
    barplot.tick_params(
            labelsize=15, 
            pad=5)
    
    sns.despine()

    plt.title(
            f'{c} - Western Pacific',
            fontsize=15,
            fontweight='bold',
            pad = 20)

    plt.ylabel(
            'Country',
            fontsize=15
        )

    plt.xlabel(
            c,
            fontsize=15
        )

    plt.show()
    print('\n')



2. Distribution of cases, deaths, and fatality rate over the years¶

In [3]:
for i in data_western.select_dtypes(include=np.number):
    
    if i != 'Year':
        fig = px.scatter(
            data_frame = data_western,
            x='Year',
            y=i,
            color='Country',
            width=800,
            title=i
        )

        fig.show()

2. Number of cases and deaths per country per year¶

In [4]:
for i in data_western['Country'].unique():

    fig, axes = plt.subplots(1, 2, figsize=(7,3))

    sns.lineplot(
        data=data_western.loc[data['Country']==i],
        x='Year',
        y='Number of reported cases of cholera',
        ax=axes[0])

    axes[0].set(
        title=f'{i} - Cases'
    )

    sns.lineplot(
        data=data_western.loc[data['Country']==i],
        x='Year',
        y='Number of reported deaths from cholera',
        ax=axes[1])
    
    axes[1].set(
        title=f'{i} - Deaths'
    )
    fig.tight_layout(pad=0.8)
    sns.despine()
    plt.show()

3. Cases vs Deaths per country¶

In [5]:
for i in data_western['Country'].unique():

    fig = px.bar(
    data_frame = data_western.loc[data['Country']==i], 
    x='Year', 
    y='Number of reported cases of cholera', 
    width=1000, 
    color='Number of reported deaths from cholera',
    color_discrete_sequence ='viridis',
    title=f'{i} - Cases vs Deaths')

    fig.show()

3. Correlation heatmap¶

In [6]:
sns.heatmap(
    data=data_western.corr(numeric_only=True),
    cmap='inferno',
    annot=True)

plt.show()

Conclusions:¶

  • The fatality rate appears to have decreased after the 90's, perhaps due to the oral vaccine that came out around this time. There was an slight increase in cases and deaths in the early 90's, maybe because of some outbreak.
  • Similar to other WHO regions, there are countries lacking information on cholera, like Kiribati, Marshall Islands, North Korea, Samoa, and others.
  • The number of cases and the number of deaths appear to have some correlation, but further statistical analysis are needed to confirm its significance.